In [3]:
rdd=sc.parallelize([1,2,3])

In [4]:
rdd


Out[4]:
ParallelCollectionRDD[1] at parallelize at PythonRDD.scala:315

In [5]:
rdd.count()


Out[5]:
3

In [6]:
rdd2=rdd.map(lambda x: x+1)

lazy evaluation: building a graph for the best way to apply all the functions


In [10]:
tryseries=rdd.toSeries()


---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-10-093062419539> in <module>()
----> 1 tryseries=rdd.toSeries()

AttributeError: 'RDD' object has no attribute 'toSeries'

In [13]:
rdd.collect()


Out[13]:
[1, 2, 3]

In [14]:
_=1

In [16]:
a=2

In [22]:
import numpy as np

In [ ]:
def func(x):
    return x+1

In [18]:
files=['a.png','b.png']
sc.paralllize(files).map(load)

In [26]:
rdd = sc.parallelize([(1,'a'),(2,'g')])

In [28]:
rdd.keys().reduce(lambda x,y:x+y)


Out[28]:
3

In [ ]: